# -*- coding: utf-8 -*-
import copy

import scrapy
from scrapy import Request
from datetime import datetime
from zc_core.model.items import Box
from zc_core.spiders.base import BaseSpider
from zc_core.util.batch_gen import time_to_batch_no
from ctg.utils.login import SeleniumLogin
from ctg.rules import *
from zc_core.spiders.base import BaseSpider


class SkuSpider(BaseSpider):
    name = 'sku'
    # 通用工业品
    index_url = 'https://ego.ctg.com.cn/mall-business/specialareab/channel/query/42'
    # 全部商品分类
    all_goods_url = 'https://ego.ctg.com.cn/mall-basedoc/ware_category/channel/0?token=1622171799219'
    # 商品sku列表
    sku_list_url = 'https://ego.ctg.com.cn/mall-basedoc/search/mallProduct/allMallProductWithAuth?searchType=2&addrCode=undefined&restrictedArea=25'

    def __init__(self, batchNo=None, *args, **kwargs):
        super(SkuSpider, self).__init__(batchNo=batchNo, *args, **kwargs)
        self.page_size = 20
        self.max_page_limit = 100
        self.cat1_white_list = list(self.settings.get('CAT1_WHITE_LIST', {}).keys())
        self.cookies = SeleniumLogin().get_cookies()

    def _build_filter_params(self, price_filter={}):
        if price_filter:
            filter = copy.copy(price_filter)
            filter.pop('termCount', None)
            filter.pop('startPrice', None)
            filter.pop('endPrice', None)
            return [filter]
        else:
            return []

    def _build_list_req(self, cat3_id, page, price_filter={}, callback=None):
        return Request(
            method='POST',
            url=self.sku_list_url,
            body=json.dumps({
                'cateCode': cat3_id,
                'channelId': 0,
                'page': page - 1,
                'pageSize': self.page_size,
                'isComputePS': True,
                'filterParam': [],
                'aggFilterParam': self._build_filter_params(price_filter)
            }),
            headers={
                'X-Requested-With': ' XMLHttpRequest',
                'Content-Type': 'application/json',
                'Accept': 'application/json, text/javascript, */*; q=0.01',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36 Edg/90.0.818.42',
            },
            meta={
                'reqType': 'sku',
                'batchNo': self.batch_no,
                'page': page,
                'catalog3Id': cat3_id,
                'priceFilter': copy.copy(price_filter),
            },
            cookies=self.cookies,
            callback=callback or self.parse_sku_list,
            errback=self.error_back,
            dont_filter=True
        )

    def start_requests(self):
        # 品类、品牌
        yield Request(
            url=self.all_goods_url,
            meta={
                'batchNo': self.batch_no,
            },
            callback=self.parse_total_page,
            errback=self.error_back,
            dont_filter=True,
            headers={
                'X-Requested-With': 'XMLHttpRequest',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/90.0.4430.72 Safari/537.36 Edg/90.0.818.42'
            }
        )

    # 处理sku列表
    def parse_total_page(self, response):
        # 分类
        cats, todo_cats = parse_catalog(response, self.cat1_white_list)
        if cats:
            self.logger.info('品类: count[%s]' % len(cats))
            yield Box('catalog', self.batch_no, cats)

            for cat in todo_cats:
                # 采集sku列表
                cat3_id = cat.get('catalogId')
                yield self._build_list_req(cat3_id=cat3_id, page=1)

    # 处理sku列表
    def parse_sku_list(self, response):
        # 获取所有品牌
        meta = response.meta
        cat3_id = meta.get('catalog3Id')
        cur_page = meta.get('page')
        price_filter = meta.get('priceFilter', {})
        price_range = price_filter.get("termValue", "")

        # 处理
        sku_list = parse_sku(response)
        if sku_list:
            self.logger.info(f'清单: cat={cat3_id}, page={cur_page}, filter={price_range}, cnt={len(sku_list)}')
            yield Box('sku', self.batch_no, sku_list)

            # 分页
            if cur_page == 1:
                # 总页数
                total_pages = parse_total_page(response)
                # 是否超限
                if total_pages > self.max_page_limit:
                    # 超限
                    self.logger.info(f'超限: cat={cat3_id}, filter={price_range}, total_page={total_pages}')
                    if not price_filter:
                        # 未经过价格筛选，则加入价格筛选条件
                        price_range_list = parse_price_range(response)
                        if price_range_list:
                            self.logger.info(f'区间: cat={cat3_id}, price_range={len(price_range_list)}')
                            for ft in price_range_list:
                                self.logger.info(f'扩维: cat={cat3_id}, filter={ft.get("termValue", "")}')
                                yield self._build_list_req(cat3_id=cat3_id, page=1, price_filter=ft)
                    else:
                        # 已有价格筛选条件，则分割筛选条件
                        f1, f2 = split_filter(price_filter)
                        if f1:
                            self.logger.info(f'分割1: cat={cat3_id}, filter={f1.get("termValue", "")}')
                            yield self._build_list_req(cat3_id=cat3_id, page=1, price_filter=f1)
                        else:
                            self.logger.info(
                                '维度：价格：状态：异常【无法进行分页】 总页数: cat=%s, total_page=%s, filter=%s' % (
                                    cat3_id, total_pages, price_range))
                        if f2:
                            self.logger.info(f'分割2: cat={cat3_id}, filter={f2.get("termValue", "")}')
                            yield self._build_list_req(cat3_id=cat3_id, page=1, price_filter=f2)
                else:
                    # 未超限
                    self.logger.info(f'页数: cat={cat3_id}, filter={price_range}, total_page={total_pages}')
                    for page in range(2, total_pages + 1):
                        yield self._build_list_req(cat3_id=cat3_id, page=page, price_filter=price_filter)
        else:
            self.logger.info(f'空页: cat={cat3_id}, filter={price_range}, page={cur_page}')
